clear all
set more off
cd "/Users/sitianliu/Library/CloudStorage/Dropbox/burning_glass/data/"

*--------------- Skill Categorization ---------------*

use temp, clear

keep if year >= 2018
drop if year == 2023 & month > 9
drop post 

gen post=0
replace post=1 if year>=2020 
replace post=0 if year==2020 & month>=1 & month<=3

** Restrict the sample to high-WFH-adoption occupations: above p75
sum dremote, d	
keep if dremote>=0.15 & dremote <.

merge 1:1 bgtjobid using temp_skill 
keep if _merge==3
drop _merge

g ln_population=ln(emp)

gen cognitive = 0
gen interpersonal = 0
gen character = 0
gen writing = 0
gen customer = 0
gen project_mgmt = 0
gen people_mgmt = 0
gen financial = 0
gen computer = 0
gen software = 0
gen physical = 0

foreach num of numlist 1(1)20 {
ren skill`num' skill 

merge m:1 skill using skill_skillclusterfamily
drop if _merge==2
drop _merge 

merge m:1 skill using skill_skillcluster
drop if _merge==2
drop _merge 

ren skill skill`num'
ren skillclusterfamily skillclusterfamily`num'
ren skillclusterfamily_id skillclusterfamily_id`num'
ren skillcluster skillcluster`num'
}

** Use  Deming and Kahn (2018)'s definition:

foreach num of numlist 1(1)20 {
ren skill`num' skill
ren skillcluster`num' skillcluster
ren skillclusterfamily`num' skillclusterfamily

replace cognitive = 1 if regexm(skill,"Problem Solving") | regexm(skill,"Research") | regexm(skill,"Analytical") | regexm(skill,"Critical Thinking") | regexm(skill,"Math") | regexm(skill,"Statistics") 

replace interpersonal = 1 if regexm(skill,"Communication") | regexm(skill,"Negotiation") | regexm(skill,"Presentation") | regexm(skill,"Relationships") | regexm(skill,"Teamwork") | regexm(skill,"Collaboration")

replace character = 1 if regexm(skill,"Organizational") | regexm(skill,"Detail-Oriented") | regexm(skill,"Multi-Tasking") | regexm(skill,"Time Management") | regexm(skill,"Meeting Deadlines") | regexm(skill,"Energetic")

replace writing = 1 if regexm(skill,"Writing")

replace customer = 1 if regexm(skill,"Sales") | regexm(skill,"Customer") | regexm(skill,"Claim") | regexm(skill,"Client") | regexm(skill,"Patient") | regexm(skill,"Guest")
	* regexm(skill,"E-Commerce") | regexm(skill,"Telemarketing") | 

replace project_mgmt = 1 if regexm(skill,"Project Management") | regexm(skill,"Project Planning") 

replace people_mgmt = 1 if regexm(skill,"Supervisory") | regexm(skill,"Leadership") | regexm(skill,"Mentoring") | regexm(skill,"Staff") | regexm(skill,"People Development") | regexm(skill,"People Management") | regexm(skill,"Conflict Management") | regexm(skill,"Human Resource Management") | regexm(skill,"Personnel Management") | regexm(skill,"Employee")

replace financial = 1 if regexm(skill,"Budgeting") | regexm(skill,"Accounting") | regexm(skill,"Finance") | regexm(skill,"Finantial") | regexm(skill,"Cost") | regexm(skill,"Tax") | regexm(skill,"Audit") | regexm(skill,"Risk") | regexm(skill,"Mortgage") 

replace computer = 1 if regexm(skill,"Computer") | regexm(skill,"Spreadsheets") 
replace computer = 1 if skillcluster=="Advanced Microsoft Excel" | skillcluster=="Anti-Malware Software" |  skillcluster=="Application Programming Interface (API)" | skillcluster=="Basic Computer Knowledge" |  skillcluster=="Enterprise Resource Planning (ERP)" | skillcluster=="Geographic Information System (GIS) Software" |  skillcluster=="Information Security" | skillcluster=="Management Information System (MIS)" |  skillcluster=="Microsoft Office and Productivity Tools" | skillcluster=="Microsoft Windows" |  skillcluster=="Operating Systems" | skillcluster=="Project Management Software" |  skillcluster=="SAP" | skillcluster=="Firmware" | skillcluster=="Middleware" | skillcluster=="Cloud Computing" | skillcluster=="Cloud Solutions" | skillcluster=="NoSQL Databases" | skillcluster=="Oracle" | skillcluster=="Systems Administration" |   skillcluster=="Technical Support" | skillcluster=="Telecommunications" | skillcluster=="Web Content" |  skillcluster=="Web Design" | skill=="Microsoft Excel" | skill=="Microsoft Power BI" | skill=="Microsoft Word"

replace software = 1 if skillcluster=="Statistical Software" | skillcluster=="Mathematical Software" | skillcluster=="Business Intelligence Software" | skillcluster=="Data Visualization" | skillcluster=="Java" | skillcluster=="JavaScript and jQuery" | skillcluster=="SQL Databases and Programming" | skillcluster=="C and C++" | skillcluster=="Cache (computing)"| skillcluster=="Distributed Computing" | skillcluster=="Other Programming Languages" | skillcluster=="PHP Web" | skillcluster=="Scripting" | skillcluster=="Scripting Languages" | skillcluster=="Software Development Methodologies" | skillcluster=="Software Development Principles" | skillcluster=="Software Development Tools" | skillcluster=="Software Quality Assurance" | skillcluster=="System Design and Implementation" | skillcluster=="Microsoft Development Tools"

replace physical = 1 if regexm(skill,"Physical Abilities")

ren skill skill`num'
ren skillcluster skillcluster`num' 
ren skillclusterfamily skillclusterfamily`num'

}

save temp_gelbach.dta, replace

*-------------------------- Gelbach Decomposition -------------------------- 

** Contributions to Change in UWP between 2018-19 and 2022-23 (Figure 4b)

use  temp_gelbach.dta, clear
drop if year == 2020 | year == 2021

* Baseline equation (Equation 3)
reghdfe ln_hrlysalary i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, replace keep(i.post#c.ln_population) ctitle (Basic) 
lincom 1.post#c.ln_population
g uwp_original=r(estimate)

*** Note: the variable ln_population is ln employment size!***********

* Fully specified equation (Equation 4) 
reghdfe ln_hrlysalary i.post##c.ln_population i.post##i.cognitive##c.ln_population i.post##i.interpersonal##c.ln_population i.post##i.character##c.ln_population i.post##i.writing##c.ln_population i.post##i.customer##c.ln_population i.post##i.project_mgmt##c.ln_population i.post##i.people_mgmt##c.ln_population i.post##i.financial##c.ln_population i.post##i.computer##c.ln_population i.post##i.software##c.ln_population i.post##i.physical##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population i.post#i.cognitive#c.ln_population i.post#i.interpersonal#c.ln_population i.post#i.character#c.ln_population i.post#i.writing#c.ln_population i.post#i.customer#c.ln_population i.post#i.project_mgmt#c.ln_population i.post#i.people_mgmt#c.ln_population i.post#i.financial#c.ln_population i.post#i.computer#c.ln_population i.post#i.software#c.ln_population i.post#i.physical#c.ln_population) ctitle (Complete) 
	* Coefficient on each skill dummy*ln_popultion*after is reported in Table A8 in the Appendix

* Auxiliary regressions (Equation 5)	
g post_cognitive_population=post*cognitive*ln_population
g post_interpersonal_population=post*interpersonal*ln_population
g post_character_population=post*character*ln_population
g post_writing_population=post*writing*ln_population
g post_customer_population=post*customer*ln_population
g post_project_population=post*project_mgmt*ln_population
g post_people_population=post*people_mgmt*ln_population
g post_financial_population=post*financial*ln_population
g post_computer_population=post*computer*ln_population
g post_software_population=post*software*ln_population
g post_physical_population=post*physical*ln_population


reghdfe post_cognitive_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Cognitive) 
reghdfe post_interpersonal_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Interpersonal) 
reghdfe post_character_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Character) 
reghdfe post_writing_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Writing) 
reghdfe post_customer_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Customer) 
reghdfe post_project_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Project) 
reghdfe post_people_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (People) 
reghdfe post_financial_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Financial) 
reghdfe post_computer_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Computer) 
reghdfe post_software_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Software) 
reghdfe post_physical_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Physical) 
	

** Listing Intensity (Equation 7): Table 4
reghdfe cognitive i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, replace keep(c.ln_population i.post#c.ln_population) ctitle (Cognitive) 
reghdfe interpersonal i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Interpersonal) 
reghdfe character i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Character) 
reghdfe writing i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Writing)
reghdfe customer i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Customer)
reghdfe project_mgmt i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Project)
reghdfe people_mgmt i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (People)
reghdfe financial i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Financial)
reghdfe computer i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Computer)
reghdfe software i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Software)
reghdfe physical i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_intensity.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Physical)


** Contributions to Change in UWP between 2018-19 and 2020-23 (Figure 4a)

use  temp_gelbach.dta, clear

* Baseline equation (Equation 3)
reghdfe ln_hrlysalary i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Basic) 
lincom 1.post#c.ln_population
g uwp_original=r(estimate)

* Fully specified equation (Equation 4)
reghdfe ln_hrlysalary i.post##c.ln_population i.post##i.cognitive##c.ln_population i.post##i.interpersonal##c.ln_population i.post##i.character##c.ln_population i.post##i.writing##c.ln_population i.post##i.customer##c.ln_population i.post##i.project_mgmt##c.ln_population i.post##i.people_mgmt##c.ln_population i.post##i.financial##c.ln_population i.post##i.computer##c.ln_population i.post##i.software##c.ln_population i.post##i.physical##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population i.post#i.cognitive#c.ln_population i.post#i.interpersonal#c.ln_population i.post#i.character#c.ln_population i.post#i.writing#c.ln_population i.post#i.customer#c.ln_population i.post#i.project_mgmt#c.ln_population i.post#i.people_mgmt#c.ln_population i.post#i.financial#c.ln_population i.post#i.computer#c.ln_population i.post#i.software#c.ln_population i.post#i.physical#c.ln_population) ctitle (Complete) 

* Auxiliary regressions (Equation 5)	
g post_cognitive_population=post*cognitive*ln_population
g post_interpersonal_population=post*interpersonal*ln_population
g post_character_population=post*character*ln_population
g post_writing_population=post*writing*ln_population
g post_customer_population=post*customer*ln_population
g post_project_population=post*project_mgmt*ln_population
g post_people_population=post*people_mgmt*ln_population
g post_financial_population=post*financial*ln_population
g post_computer_population=post*computer*ln_population
g post_software_population=post*software*ln_population
g post_physical_population=post*physical*ln_population


reghdfe post_cognitive_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Cognitive) 
reghdfe post_interpersonal_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Interpersonal) 
reghdfe post_character_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Character) 
reghdfe post_writing_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Writing) 
reghdfe post_customer_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Customer) 
reghdfe post_project_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Project) 
reghdfe post_people_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (People) 
reghdfe post_financial_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Financial) 
reghdfe post_computer_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Computer) 
reghdfe post_software_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Software) 
reghdfe post_physical_population i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
	outreg2 using gelbach_all.xls, append keep(i.post#c.ln_population) ctitle (Physical) 
	

** Listing Density (Equation 7): Results are not reported in the paper but consistent with Table 4.
reghdfe cognitive i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Cognitive) 
reghdfe interpersonal i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Interpersonal) 
reghdfe character i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Character) 
reghdfe writing i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Writing)
reghdfe customer i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Customer)
reghdfe project_mgmt i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Project)
reghdfe people_mgmt i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (People)
reghdfe financial i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Financial)
reghdfe computer i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Computer)
reghdfe software i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Software)
reghdfe physical i.post##c.ln_population, absorb(month edu onet_group naics_group jobhours_group taxterm_group) cluster(metrofips)
		outreg2 using gelbach_density.xls, append keep(c.ln_population i.post#c.ln_population) ctitle (Physical)

** Summary Statistics (Mean in Table 4)

use temp_gelbach.dta, clear

	estpost sum cognitive if post == 0
	esttab using gelbach_sum.csv, cells("count mean sd") replace 
	
foreach v of varlist interpersonal-physical {
	estpost sum `v' if post == 0
	esttab using gelbach_sum.csv, cells("count mean sd") append
}


** Graph (Figure 4)

** Use the estimates from Equations (3), (4), and (5) above to compute the contribution of each skill to the overall
** decline in the UWR, based on the formula in Equation (6). 
** Save these computed numbers to excel file "gelbach_results"

import excel "gelbach_results.xlsx", sheet("Sheet1") firstrow clear

format %6.4g contribution
gen percentage = 100 * contribution

replace skill = "Social" if skill == "Interpersonal"
replace skill = "Customer and Client Support" if skill == "Customer"
replace skill = "Project Management" if skill == "Project"
replace skill = "People Management" if skill == "People"

graph hbar (mean) percentage if period == "2022", over(skill, sort(1) descending) graphregion(color(white)) bar(1,color(black)) ytitle("Contribution to Decrease in Urban Wage Premium (in Percentage)" "between 2018-19 and 2022-23",size(small)) ylab(0(20)100)
graph export "gelbach_all_2022.png", replace

graph hbar (mean) percentage if period == "all", over(skill, sort(1) descending) graphregion(color(white)) bar(1,color(black))  ytitle("Contribution to Decrease in Urban Wage Premium (in Percentage)" "between 2018-19 and 2020-23",size(small)) ylab(0(20)100)
graph export "gelbach_all.png", replace


